{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>140.5625</th>\n",
       "      <th>55.68378214</th>\n",
       "      <th>-0.234571412</th>\n",
       "      <th>-0.699648398</th>\n",
       "      <th>3.199832776</th>\n",
       "      <th>19.11042633</th>\n",
       "      <th>7.975531794</th>\n",
       "      <th>74.24222492</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>102.507812</td>\n",
       "      <td>58.882430</td>\n",
       "      <td>0.465318</td>\n",
       "      <td>-0.515088</td>\n",
       "      <td>1.677258</td>\n",
       "      <td>14.860146</td>\n",
       "      <td>10.576487</td>\n",
       "      <td>127.393580</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>103.015625</td>\n",
       "      <td>39.341649</td>\n",
       "      <td>0.323328</td>\n",
       "      <td>1.051164</td>\n",
       "      <td>3.121237</td>\n",
       "      <td>21.744669</td>\n",
       "      <td>7.735822</td>\n",
       "      <td>63.171909</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>136.750000</td>\n",
       "      <td>57.178449</td>\n",
       "      <td>-0.068415</td>\n",
       "      <td>-0.636238</td>\n",
       "      <td>3.642977</td>\n",
       "      <td>20.959280</td>\n",
       "      <td>6.896499</td>\n",
       "      <td>53.593661</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>88.726562</td>\n",
       "      <td>40.672225</td>\n",
       "      <td>0.600866</td>\n",
       "      <td>1.123492</td>\n",
       "      <td>1.178930</td>\n",
       "      <td>11.468720</td>\n",
       "      <td>14.269573</td>\n",
       "      <td>252.567306</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>93.570312</td>\n",
       "      <td>46.698114</td>\n",
       "      <td>0.531905</td>\n",
       "      <td>0.416721</td>\n",
       "      <td>1.636288</td>\n",
       "      <td>14.545074</td>\n",
       "      <td>10.621748</td>\n",
       "      <td>131.394004</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     140.5625  55.68378214  -0.234571412  -0.699648398  3.199832776  \\\n",
       "0  102.507812    58.882430      0.465318     -0.515088     1.677258   \n",
       "1  103.015625    39.341649      0.323328      1.051164     3.121237   \n",
       "2  136.750000    57.178449     -0.068415     -0.636238     3.642977   \n",
       "3   88.726562    40.672225      0.600866      1.123492     1.178930   \n",
       "4   93.570312    46.698114      0.531905      0.416721     1.636288   \n",
       "\n",
       "   19.11042633  7.975531794  74.24222492  0  \n",
       "0    14.860146    10.576487   127.393580  0  \n",
       "1    21.744669     7.735822    63.171909  0  \n",
       "2    20.959280     6.896499    53.593661  0  \n",
       "3    11.468720    14.269573   252.567306  0  \n",
       "4    14.545074    10.621748   131.394004  0  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.read_csv('HTRU_2.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>Mean of integrated profile</th>\n",
       "      <th>Standard deviation of integrated profile</th>\n",
       "      <th>Excess kurtosis of integrated profile</th>\n",
       "      <th>Skewness of integrated profile</th>\n",
       "      <th>Mean of DM-SNR curve</th>\n",
       "      <th>Standard deviation of DM-SNR curve</th>\n",
       "      <th>Excess kurtosis of DM-SNR curve</th>\n",
       "      <th>Skewness of DM-SNR curve</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>140.562500</td>\n",
       "      <td>55.683782</td>\n",
       "      <td>-0.234571</td>\n",
       "      <td>-0.699648</td>\n",
       "      <td>3.199833</td>\n",
       "      <td>19.110426</td>\n",
       "      <td>7.975532</td>\n",
       "      <td>74.242225</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>102.507812</td>\n",
       "      <td>58.882430</td>\n",
       "      <td>0.465318</td>\n",
       "      <td>-0.515088</td>\n",
       "      <td>1.677258</td>\n",
       "      <td>14.860146</td>\n",
       "      <td>10.576487</td>\n",
       "      <td>127.393580</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>103.015625</td>\n",
       "      <td>39.341649</td>\n",
       "      <td>0.323328</td>\n",
       "      <td>1.051164</td>\n",
       "      <td>3.121237</td>\n",
       "      <td>21.744669</td>\n",
       "      <td>7.735822</td>\n",
       "      <td>63.171909</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>136.750000</td>\n",
       "      <td>57.178449</td>\n",
       "      <td>-0.068415</td>\n",
       "      <td>-0.636238</td>\n",
       "      <td>3.642977</td>\n",
       "      <td>20.959280</td>\n",
       "      <td>6.896499</td>\n",
       "      <td>53.593661</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>88.726562</td>\n",
       "      <td>40.672225</td>\n",
       "      <td>0.600866</td>\n",
       "      <td>1.123492</td>\n",
       "      <td>1.178930</td>\n",
       "      <td>11.468720</td>\n",
       "      <td>14.269573</td>\n",
       "      <td>252.567306</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Mean of integrated profile Standard deviation of integrated profile  \\\n",
       "0                 140.562500                                55.683782   \n",
       "1                 102.507812                                58.882430   \n",
       "2                 103.015625                                39.341649   \n",
       "3                 136.750000                                57.178449   \n",
       "4                  88.726562                                40.672225   \n",
       "\n",
       "  Excess kurtosis of integrated profile Skewness of integrated profile  \\\n",
       "0                             -0.234571                      -0.699648   \n",
       "1                              0.465318                      -0.515088   \n",
       "2                              0.323328                       1.051164   \n",
       "3                             -0.068415                      -0.636238   \n",
       "4                              0.600866                       1.123492   \n",
       "\n",
       "  Mean of DM-SNR curve Standard deviation of DM-SNR curve  \\\n",
       "0             3.199833                          19.110426   \n",
       "1             1.677258                          14.860146   \n",
       "2             3.121237                          21.744669   \n",
       "3             3.642977                          20.959280   \n",
       "4             1.178930                          11.468720   \n",
       "\n",
       "  Excess kurtosis of DM-SNR curve Skewness of DM-SNR curve Class  \n",
       "0                        7.975532                74.242225     0  \n",
       "1                       10.576487               127.393580     0  \n",
       "2                        7.735822                63.171909     0  \n",
       "3                        6.896499                53.593661     0  \n",
       "4                       14.269573               252.567306     0  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('HTRU_2.csv', header = None)\n",
    "df.columns = [['Mean of integrated profile', 'Standard deviation of integrated profile', \n",
    "               'Excess kurtosis of integrated profile', 'Skewness of integrated profile',\n",
    "               'Mean of DM-SNR curve', 'Standard deviation of DM-SNR curve',\n",
    "               'Excess kurtosis of DM-SNR curve', 'Skewness of DM-SNR curve', 'Class' ]]\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 17898 entries, 0 to 17897\n",
      "Data columns (total 9 columns):\n",
      "(Mean of integrated profile,)                  17898 non-null float64\n",
      "(Standard deviation of integrated profile,)    17898 non-null float64\n",
      "(Excess kurtosis of integrated profile,)       17898 non-null float64\n",
      "(Skewness of integrated profile,)              17898 non-null float64\n",
      "(Mean of DM-SNR curve,)                        17898 non-null float64\n",
      "(Standard deviation of DM-SNR curve,)          17898 non-null float64\n",
      "(Excess kurtosis of DM-SNR curve,)             17898 non-null float64\n",
      "(Skewness of DM-SNR curve,)                    17898 non-null float64\n",
      "(Class,)                                       17898 non-null int64\n",
      "dtypes: float64(8), int64(1)\n",
      "memory usage: 1.2 MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17898"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Exercise152 begins from here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import cross_val_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df.iloc[:, 0:8]\n",
    "y = df.iloc[:, 8]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def clf_model(model):\n",
    "    clf = model\n",
    "\n",
    "    scores = cross_val_score(clf, X, y)\n",
    "\n",
    "    print('Scores:', scores)\n",
    "    print('Mean score:', scores.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scores: [0.9740238  0.98223265 0.97686505]\n",
      "Mean score: 0.9777071651161332\n"
     ]
    }
   ],
   "source": [
    "clf_model(LogisticRegression(random_state = 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Exercise153 begins from here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scores: [0.95692978 0.92474019 0.94836547]\n",
      "Mean score: 0.9433451467026904\n"
     ]
    }
   ],
   "source": [
    "from sklearn.naive_bayes import GaussianNB\n",
    "clf_model(GaussianNB())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scores: [0.96899615 0.97200805 0.97082984]\n",
      "Mean score: 0.9706113439320188\n"
     ]
    }
   ],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "clf_model(KNeighborsClassifier())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scores: [0.96782303 0.96161582 0.9673093 ]\n",
      "Mean score: 0.9655827179728252\n"
     ]
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "clf_model(DecisionTreeClassifier(random_state = 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scores: [0.97586727 0.97820986 0.97720034]\n",
      "Mean score: 0.9770924870413066\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "clf_model(RandomForestClassifier(random_state = 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Exercise154 begins from here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Class    17898\n",
       "dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.Class.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Class    1639\n",
       "dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.Class == 1].Class.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Class    0.091574\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.Class == 1].Class.count()/df.Class.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def confusion(model):\n",
    "    clf = model\n",
    "    clf.fit(X_train, y_train)\n",
    "    y_pred = clf.predict(X_test)\n",
    "    print('Confusion Matrix:', confusion_matrix(y_test, y_pred))\n",
    "    print('Classification Report:', classification_report(y_test, y_pred))\n",
    "    return clf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Confusion Matrix: [[4094   21]\n",
      " [  63  297]]\n",
      "Classification Report:              precision    recall  f1-score   support\n",
      "\n",
      "          0       0.98      0.99      0.99      4115\n",
      "          1       0.93      0.82      0.88       360\n",
      "\n",
      "avg / total       0.98      0.98      0.98      4475\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
       "          penalty='l2', random_state=0, solver='liblinear', tol=0.0001,\n",
       "          verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "confusion(LogisticRegression(random_state = 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Confusion Matrix: [[4077   38]\n",
      " [  69  291]]\n",
      "Classification Report:              precision    recall  f1-score   support\n",
      "\n",
      "          0       0.98      0.99      0.99      4115\n",
      "          1       0.88      0.81      0.84       360\n",
      "\n",
      "avg / total       0.98      0.98      0.98      4475\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "           metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n",
       "           weights='uniform')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "confusion(KNeighborsClassifier())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Confusion Matrix: [[3946  169]\n",
      " [  52  308]]\n",
      "Classification Report:              precision    recall  f1-score   support\n",
      "\n",
      "          0       0.99      0.96      0.97      4115\n",
      "          1       0.65      0.86      0.74       360\n",
      "\n",
      "avg / total       0.96      0.95      0.95      4475\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GaussianNB(priors=None)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "confusion(GaussianNB())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Confusion Matrix: [[4093   22]\n",
      " [  61  299]]\n",
      "Classification Report:              precision    recall  f1-score   support\n",
      "\n",
      "          0       0.99      0.99      0.99      4115\n",
      "          1       0.93      0.83      0.88       360\n",
      "\n",
      "avg / total       0.98      0.98      0.98      4475\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "            min_samples_leaf=1, min_samples_split=2,\n",
       "            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
       "            oob_score=False, random_state=0, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "confusion(RandomForestClassifier(random_state = 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Exercise156 begins from here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scores: [0.97519692 0.98122695 0.97652976]\n",
      "Mean score: 0.9776512086736252\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "clf_model(AdaBoostClassifier(random_state = 0))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Confusion Matrix: [[4094   21]\n",
      " [  63  297]]\n",
      "Classification Report:              precision    recall  f1-score   support\n",
      "\n",
      "          0       0.98      0.99      0.99      4115\n",
      "          1       0.93      0.82      0.88       360\n",
      "\n",
      "avg / total       0.98      0.98      0.98      4475\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n",
       "          learning_rate=1.0, n_estimators=50, random_state=0)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "confusion(AdaBoostClassifier(random_state = 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python_Workshop",
   "language": "python",
   "name": "python_workshop"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
